rm(list=ls())
setwd("/Users/claytoa/Box Sync/gender/Lesotho/Elections/Submissions/JEPS/R&R/Clayton Replication Files")

##load libraries 
library(MASS)
library(car)
library(stats)
library(foreign)
library(ri)
library(texreg)
library(sandwich)
library(lmtest)
library(multiwayvcov)
library(gridExtra)


set.seed(1)


les12<-read.csv("LesothoAB2012.csv")


#basic t-tests 
t.test(BiasPol ~Quota, data= les12) # not sig
t.test(BiasRights ~Quota, data= les12) #not sig
t.test(BiasEdu ~Quota, data= les12) #not sig

#Table 1 - Explicit Bias 

##randomization inference (code adapted from Gerber & Green 2012), responses clustered by ED.
#Political Bias 
#redefine variables
Y<-les12 $BiasPol #outcome var
Z<- les12 $Quota #treatment var
clust <- les12 $District.1

probs <- genprobexact(Z,clustvar=clust)  # subjects are clustered by precinct
numiter <- 100000  # actual number of randomizations in this case is 40116600
perms <- genperms(Z,maxiter=numiter,clustvar=clust)    # clustered assignment
numiter <- ncol(perms)  # reset numiter so that it is no larger than the maximum number of possible randomizations

## show the number of observations
nrow(as.matrix(Y))

## sort the data by cluster size in order to see the range
sort(as.matrix((table(clust))))

# Estimate the ATE using (possibly biased) difference-in-means

ate <- estate(Y,Z,prob=probs)
Ys <- genouts(Y,Z,ate=0)
distout <- gendist(Ys,perms,prob=probs)

ate                                  # estimated ATE
sum(distout <= ate)                  # one-tailed comparison
sum(abs(distout) >= abs(ate))        # two-tailed comparison
dispdist(distout,ate)                

# estimate the confidence interval assuming that the true ATE=ate
Ys <- genouts(Y,Z,ate=ate)
distout <- gendist(Ys,perms,prob=probs)
dispdist(distout,ate)      


##Traditional Bias 
#redefine variables
Y<-les12 $BiasRights #outcome var

probs <- genprobexact(Z,clustvar=clust) 
numiter <- 100000  
perms <- genperms(Z,maxiter=numiter,clustvar=clust)    # clustered assignment
numiter <- ncol(perms)  # reset numiter so that it is no larger than the maximum number of possible randomizations

## show the number of observations
nrow(as.matrix(Y))

## sort the data by cluster size in order to see the range
sort(as.matrix((table(clust))))

# Estimate the ATE using (possibly biased) difference-in-means

ate <- estate(Y,Z,prob=probs)
Ys <- genouts(Y,Z,ate=0)
distout <- gendist(Ys,perms,prob=probs)

ate                                  # estimated ATE
sum(distout <= ate)                  # one-tailed comparison
sum(abs(distout) >= abs(ate))        # two-tailed comparison
dispdist(distout,ate)                

# estimate the confidence interval assuming that the true ATE=ate
Ys <- genouts(Y,Z,ate=ate)
distout <- gendist(Ys,perms,prob=probs)
dispdist(distout,ate)  


###Education Bias 
#redefine variables
Y<-les12 $BiasEdu #outcome var

probs <- genprobexact(Z,clustvar=clust)  
numiter <- 100000  
perms <- genperms(Z,maxiter=numiter,clustvar=clust)    # clustered assignment
numiter <- ncol(perms)  # reset numiter so that it is no larger than the maximum number of possible randomizations

## show the number of observations
nrow(as.matrix(Y))

## sort the data by cluster size in order to see the range
sort(as.matrix((table(clust))))

# Estimate the ATE using (possibly biased) difference-in-means

ate <- estate(Y,Z,prob=probs)
Ys <- genouts(Y,Z,ate=0)
distout <- gendist(Ys,perms,prob=probs)

ate                                  # estimated ATE
sum(distout <= ate)                  # one-tailed comparison
sum(abs(distout) >= abs(ate))        # two-tailed comparison
dispdist(distout,ate)                

# estimate the confidence interval assuming that the true ATE=ate
Ys <- genouts(Y,Z,ate=ate)
distout <- gendist(Ys,perms,prob=probs)
dispdist(distout,ate)  


##Appendix Table A1

#Test for CATES

Women<-subset(les12, les12$Female==1)
Men<-subset(les12, les12$Female==0)

# BiasPol
lm1.1<-lm(BiasPol ~ Quota* Female, data = les12)
lm1.2<-lm(BiasPol ~ Quota* Under25, data = les12)
lm1.3<-lm(BiasPol ~ Quota* Under25, data = Women)
lm1.4<-lm(BiasPol ~ Quota* Under25, data = Men)

robust1.1<-robustse.f(lm1.1, ~District.1, F)
robust1.2<-robustse.f(lm1.2, ~District.1, F)
robust1.3<-robustse.f(lm1.3, ~District.1, F)
robust1.4<-robustse.f(lm1.4, ~District.1, F)

#BiasRight 
lm2.1<-lm(BiasRights ~ Quota* Female, data = les12)
lm2.2<-lm(BiasRights ~ Quota* Under25, data = les12)
lm2.3<-lm(BiasRights ~ Quota* Under25, data = Women)
lm2.4<-lm(BiasRights ~ Quota* Under25, data = Men)

robust2.1<-robustse.f(lm2.1, ~District.1, F)
robust2.2<-robustse.f(lm2.2, ~District.1, F)
robust2.3<-robustse.f(lm2.3, ~District.1, F)
robust2.4<-robustse.f(lm2.4, ~District.1, F)

#BiasEdu 
lm3.1<-lm(BiasEdu ~ Quota* Female, data = les12)
lm3.2<-lm(BiasEdu ~ Quota* Under25, data = les12)
lm3.3<-lm(BiasEdu ~ Quota* Under25, data = Women)
lm3.4<-lm(BiasEdu ~ Quota* Under25, data = Men)

robust3.1<-robustse.f(lm3.1, ~District.1, F)
robust3.2<-robustse.f(lm3.2, ~District.1, F)
robust3.3<-robustse.f(lm3.3, ~District.1, F)
robust3.4<-robustse.f(lm3.4, ~District.1, F)


screenreg(list(robust1.1, robust1.2, robust1.3, robust1.4), custom.note= "To correct for multiple sub-group tests, coefficients with $p<0.0125$ are marked in \\textbf{bold}", bold = 0.0125, stars=numeric(0), digits=3)

screenreg(list(robust2.1, robust2.2, robust2.3, robust2.4), custom.note= "To correct for multiple sub-group tests, coefficients with $p<0.0125$ are marked in \\textbf{bold}", bold = 0.0125, stars=numeric(0), digits=3)

screenreg(list(robust3.1, robust3.2, robust3.3, robust3.4), custom.note= "To correct for multiple sub-group tests, coefficients with $p<0.0125$ are marked in \\textbf{bold}", bold = 0.0125, stars=numeric(0), digits=3)


#Figure 1 
plot1<-TwowayME.f(lm1.3, Women$Quota, Women$Under25, "Age Cohort", "Marginal Effect of Quota", 99, cluster= Women$District.1, df_correction=F, Low="Over 25", High="Under 25") +ggtitle("Political Bias \n Women Respondents")  + theme(plot.title = element_text(hjust = 0.5)) +ylim(-1.2,1.2)

plot2<-TwowayME.f(lm1.4, Men$Quota, Men $Under25, "Age Cohort", "Marginal Effect of Quota", 99, cluster= Men $District.1, df_correction=F, Low="Over 25", High="Under 25")+ggtitle("Political Bias \n Men Respondents")  + theme(plot.title = element_text(hjust = 0.5)) +ylim(-1.2,1.2)

 grid.arrange(plot1, plot2, ncol=2, nrow=1)



##Implicit Bias 

implicit<-read.csv("ImplicitBias2014.csv")
women<-subset(implicit, implicit $female ==1)
men<-subset(implicit, implicit $female ==0)

t.test(d.score ~quota, implicit) #not sig


##Table 2 
#with randomization inference (data adapted from Gerber & Green 2012)
#redefine variables
Y<-implicit $d.score #outcome var
Z<- implicit $quota #treatment var
clust <- implicit $ED

probs <- genprobexact(Z,clustvar=clust)  # subjects are clustered by ED
numiter <- 100000  # 
perms <- genperms(Z,maxiter=numiter,clustvar=clust)    # clustered assignment
numiter <- ncol(perms)  # reset numiter so that it is no larger than the maximum number of possible randomizations

## show the number of observations
nrow(as.matrix(Y))

## sort the data by cluster size in order to see the range
sort(as.matrix((table(clust))))

# Estimate the ATE using (possibly biased) difference-in-means

ate <- estate(Y,Z,prob=probs)
Ys <- genouts(Y,Z,ate=0)
distout <- gendist(Ys,perms,prob=probs)

ate                                  # estimated ATE
sum(distout <= ate)                  # one-tailed comparison
sum(abs(distout) >= abs(ate))        # two-tailed comparison
dispdist(distout,ate)                

# estimate the confidence interval assuming that the true ATE=ate
Ys <- genouts(Y,Z,ate=ate)
distout <- gendist(Ys,perms,prob=probs)
dispdist(distout,ate)      



#redefine variables
Y<-women $d.score #outcome var
Z<- women $quota #treatment var
clust <- women $ED

probs <- genprobexact(Z,clustvar=clust)  # subjects are clustered by ED
numiter <- 100000  # actual number of randomizations in this case is 40116600
perms <- genperms(Z,maxiter=numiter,clustvar=clust)    # clustered assignment
numiter <- ncol(perms)  # reset numiter so that it is no larger than the maximum number of possible randomizations

## show the number of observations
nrow(as.matrix(Y))

## sort the data by cluster size in order to see the range
sort(as.matrix((table(clust))))

# Estimate the ATE using (possibly biased) difference-in-means

ate <- estate(Y,Z,prob=probs)
Ys <- genouts(Y,Z,ate=0)
distout <- gendist(Ys,perms,prob=probs)

ate                                  # estimated ATE
sum(distout <= ate)                  # one-tailed comparison
sum(abs(distout) >= abs(ate))        # two-tailed comparison
dispdist(distout,ate)                

# estimate the confidence interval assuming that the true ATE=ate
Ys <- genouts(Y,Z,ate=ate)
distout <- gendist(Ys,perms,prob=probs)
dispdist(distout,ate)      


#redefine variables
Y<-men $d.score #outcome var
Z<- men $quota #treatment var
clust <- men $ED

probs <- genprobexact(Z,clustvar=clust)  # 
numiter <- 100000  # 
perms <- genperms(Z,maxiter=numiter,clustvar=clust)    
numiter <- ncol(perms) 

## show the number of observations
nrow(as.matrix(Y))

## sort the data by cluster size in order to see the range
sort(as.matrix((table(clust))))

# Estimate the ATE using (possibly biased) difference-in-means

ate <- estate(Y,Z,prob=probs)
Ys <- genouts(Y,Z,ate=0)
distout <- gendist(Ys,perms,prob=probs)

ate                                  # estimated ATE
sum(distout <= ate)                  # one-tailed comparison
sum(abs(distout) >= abs(ate))        # two-tailed comparison
dispdist(distout,ate)                

# estimate the confidence interval assuming that the true ATE=ate
Ys <- genouts(Y,Z,ate=ate)
distout <- gendist(Ys,perms,prob=probs)
dispdist(distout,ate)      


#Table 3

#modeld based estimates 
lm4.1<-lm(d.score ~ quota* female, data = implicit)
lm4.2<-lm(d.score ~ quota* under25, data = implicit)
lm4.3<-lm(d.score ~ quota* under25, data = women)
lm4.4<-lm(d.score ~ quota* under25, data = men)

robust4.1<-robustse.f(lm4.1, ~ED, F)
robust4.2<-robustse.f(lm4.2, ~ED, F)
robust4.3<-robustse.f(lm4.3, ~ED, F)
robust4.4<-robustse.f(lm4.4, ~ED, F)

screenreg(list(robust4.1, robust4.2, robust4.3, robust4.4), custom.note= "To correct for multiple sub-group tests, coefficients with $p<0.0125$ are marked in \\textbf{bold}", bold = 0.0125, stars=numeric(0), digits=3)

#Figure 2
#Plot Implicit Bias

plot1<-TwowayME.f(lm4.3, women$quota, women$under25, "Age Cohort", "Marginal Effect of Quota", 99, cluster= women $ED, df_correction=F, Low="Over 25", High="Under 25") +ggtitle("Implicit Bias \n Women Respondents")  + theme(plot.title = element_text(hjust = 0.5)) +ylim(-0.35,0.35)

plot2<-TwowayME.f(lm4.4, men$quota, men$under25, "Age Cohort", "Marginal Effect of Quota", 99, cluster= men $ED, df_correction=F, Low="Over 25", High="Under 25")+ggtitle("Implicit Bias \n Men Respondents")  + theme(plot.title = element_text(hjust = 0.5)) +ylim(-0.35,0.35)

grid.arrange(plot1, plot2, ncol=2, nrow=1)

